{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 183,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "import math\n",
    "import numpy as np\n",
    "import operator\n",
    "\n",
    "movie_titles_map = {}\n",
    "movie_user_map={}\n",
    "user_movie_map={}\n",
    "user_avg_rating = {}\n",
    "cache = {}\n",
    "\n",
    "def read_movie_title(fname):   \n",
    "    with open(fname) as f:\n",
    "        for line in f.readlines():\n",
    "            parts = [x.strip() for x in line.split(',')]\n",
    "            movie_id = int(parts[0])\n",
    "            movie_titles_map[movie_id] = parts[2] + \"(\"+ parts[1]+\")\"    \n",
    "\n",
    "def get_ratings_map(fname):\n",
    "    with open(fname) as f:\n",
    "        for line in f.readlines():\n",
    "            parts = [x.strip() for x in line.split(',')]\n",
    "            movie_title_id = int(float(parts[0]))\n",
    "            user_id = int(float(parts[1]))\n",
    "            rating = float(parts[2])\n",
    "            if user_id not in user_movie_map:\n",
    "                user_movie_map[user_id]={}\n",
    "            user_movie_map[user_id][movie_title_id] = rating\n",
    "            \n",
    "            if movie_title_id not in movie_user_map:\n",
    "                movie_user_map[movie_title_id] = []\n",
    "            movie_user_map[movie_title_id].append(user_id)\n",
    "\n",
    "def get_user_avg_rating():\n",
    "    for user in user_movie_map:\n",
    "        sum = 0\n",
    "        i=0\n",
    "        for movie in user_movie_map[user]:\n",
    "            sum = sum + float(user_movie_map[user][movie])\n",
    "            i = i + 1 \n",
    "        avg = sum/i\n",
    "        user_avg_rating[user]=avg\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 184,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "movie_titles_filename=\"movie_titles.txt\"\n",
    "ratings_filename=\"ratings.txt\"\n",
    "read_movie_title(movie_titles_filename)\n",
    "get_ratings_map(ratings_filename)\n",
    "get_user_avg_rating()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 185,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "def get_user_corr(active_user):\n",
    "    user_correlation = {}\n",
    "    for user in user_movie_map:\n",
    "        if user!=active_user:\n",
    "            nominator = 0\n",
    "            sum_vaj_diff = 0\n",
    "            sum_vij_diff = 0\n",
    "            for movie in user_movie_map[active_user]:\n",
    "                if movie in user_movie_map[user]:                \n",
    "                    nominator +=  (user_movie_map[active_user][movie] - user_avg_rating[active_user]) * (user_movie_map[user][movie] - user_avg_rating[user])\n",
    "                    sum_vaj_diff +=  np.power(user_movie_map[active_user][movie] - user_avg_rating[active_user], 2)\n",
    "                    sum_vij_diff +=  np.power(user_movie_map[user][movie] - user_avg_rating[user], 2)\n",
    "            denominator= np.sqrt(sum_vaj_diff * sum_vij_diff)  \n",
    "            if denominator!=0:\n",
    "                user_correlation[user] = nominator/denominator\n",
    "    return user_correlation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 186,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "def recommendation(active_user, K):\n",
    "    if active_user not in cache:\n",
    "        user_correlation = get_user_corr(active_user)\n",
    "        predicted_rating ={}\n",
    "        for movie in movie_titles_map:\n",
    "            temp_rating = 0\n",
    "            if movie in movie_user_map:\n",
    "                for user in movie_user_map[movie]:\n",
    "                    if user in user_correlation:\n",
    "                        temp_rating +=user_correlation[user]*(user_movie_map[user][movie]- user_avg_rating[user])\n",
    "            predicted_rating[movie] = temp_rating\n",
    "\n",
    "        predicted_rating = sorted(predicted_rating.items(), key=lambda kv: kv[1], reverse=True)\n",
    "        cache[active_user] = predicted_rating\n",
    "    else:\n",
    "        predicted_rating= cache[active_user]\n",
    "        \n",
    "    recommended_movies = predicted_rating[:K]\n",
    "    for movie in recommended_movies:\n",
    "        print(movie_titles_map[movie[0]])\n",
    "        \n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 189,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Tomb Raider(2001)\n",
      "Sweet Home Alabama(2002)\n",
      "13 Going on 30(2004)\n",
      "The Cannonball Run(1981)\n",
      "Johnny Mnemonic(1995)\n"
     ]
    }
   ],
   "source": [
    "active_user=2482502\n",
    "K=5\n",
    "recommendation(active_user, K)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
